notebook.community

Edit and run



In [ ]:

    
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import elasticsearch
import unicodedata
import csv

es = elasticsearch.Elasticsearch(["localhost:9200"])

Setup your variables



In [ ]:

    
index_name="weiboscope"
meme_keywords="杜甫很忙"
csv_file="杜甫很忙.csv"

Launch the request



In [25]:

    
res = es.search(index=index_name, body={"query": {"match": { "text" : "'杜甫很忙'" }}})
data_size=res['hits']['total']
print("Total %d Hits" % data_size)

res2 = es.search(index=index_name, body={"query": {"match": { "text" : "'杜甫很忙'" }}}, size=10)
results=res2['hits']['hits']
print("%d Hits Retreived" % len(results))









    



Total 705362 Hits
10 Hits Retreived



In [26]:

    
# get headers
headers=[value for value in sample["_source"]]



In [50]:

    
# then open a csv file, and loop through the results, writing to the csv
with open(csv_file, 'wb') as csvfile: 
    filewriter = csv.writer(csvfile)
    # create column header row
    filewriter.writerow(headers)
    for sample in results: 
        row=[]
        for id in sample["_source"]:
            if type(sample["_source"][id]) == unicode : data = sample["_source"][id].encode("utf-8") 
            else : data = sample["_source"][id] 
            row.append(data)

        filewriter.writerow(row)

print "Done. Data saved in %s"%csv_file









    



Done. Data saved in 杜甫很忙.csv



In [ ]: